require(nlme)
require(reshape2)
require(ggplot2)
require(multcomp)
require(emmeans)
require(rstatix)
require(ggpubr)
require(RColorBrewer)
require(tidyverse)
require(dplyr)
require(ggbeeswarm)
require(readxl)
require(broom)
require(scales)
require(RCurl)
require(plotly)
require(ggrepel)
require(gridExtra)
require(ggExtra)
require(DT)
source("./HelperFunctions.R")


theme_point<-theme_bw()+theme(strip.background = element_blank())
theme_bar<-theme_bw()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.line.x = element_blank())
theme_boxplot<-theme_classic()+theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),strip.background = element_blank(),axis.ticks.x = element_blank(),axis.title.x = element_blank(),axis.line.x = element_blank(),legend.position = "none")
color_panel<-c("#e35d6a","#5bb75b","#428bca","#e87810","#23496b","#ffbf00","#cc2028","#039748","pink","darkgray")
names(color_panel)<-sort(c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A"))

color_panel1 <-  c("#039748","#039748","#ffbf00","#ffbf00","#e35d6a","#e35d6a","#5bb75b","#5bb75b","#428bca","#428bca","#23496b","#23496b","#cc2028","#cc2028","#e87810")
color_panel2 <-  brewer.pal(10, name = "Paired")
names(color_panel2) <- c("serum","Biomatrica","EDTA separator","EDTA","Citrate","RNA Streck","Roche","DNA Streck", "PAXgene", "ACD-A")
full_nr <- format_format(big.mark = ",", decimal.mark = ".", scientific = FALSE)


color_panel_spikes <- c("#e35d6a","gray","#5bb75b","#428bca")
names(color_panel_spikes) <- c("LP1","MIMAT","RC1","RC2")
color_panel_spikes2 <- c("#23496b","#ffbf00","gray","#ffbf01","orange","#23496b")
names(color_panel_spikes2) <- c("LP","RC","MIMAT","RC1","RC2","LP1")

set.seed(1)

1 Annotation

#Annotation
sample_annotation<-read_tsv("./Annotation_exRNAQC017.csv")
colnames(sample_annotation)[23]<-'TimeInterval' 
sample_annotation<-sample_annotation %>% mutate(donorID= case_when((Donor == "PNL-XNID") ~ "D5",
                                                                   (Donor == "PNL-7DEN") ~ "D1",
                                                                   (Donor == "PNL-8ZI1") ~ "D2",
                                                                   (Donor == "PNL-NLID") ~ "D3",
                                                                   (Donor == "PNL-UCH7") ~ "D4"), 
                                                SampleID= paste0(GraphKit,"_",GraphTube,"_",TimeInterval,"_",donorID),
                                                tube_kitID=paste0(GraphTube,"_",GraphKit),
                                                tube_kit_timeID=paste0(GraphTube,"_",GraphKit,"_",TimeInterval),
                                                PlasmaInputVml= PlasmaInputV/1000)
    
sample_annotation$Tube<-as.factor(sample_annotation$Tube)
sample_annotation$RNAisolation<-as.factor(sample_annotation$RNAisolation)
sample_annotation$TimeInterval<-as.factor(sample_annotation$TimeInterval)
sample_annotation$donorID<-as.factor(sample_annotation$donorID)

#Reads
miRNAs <- data.table::fread("mergedTxts/allmiRs_subs.txt", header=T, data.table = F)
spikes <- data.table::fread("mergedTxts/allspikes_subs.txt", header=T, data.table = F)
reads <- data.table::fread("mergedTxts/allreads_subs.txt", header=T, data.table = F)
contam <- data.table::fread("mergedTxts/allcontam_subs.txt", header=T, data.table = F)

reads_total<- read_tsv("./smallRNA_table.tsv")
reads_total <- full_join(reads_total, sample_annotation, by = "RNAID")

Tube.levels<-levels(reads_total$Tube)
RNAisolation.levels<-levels(reads_total$RNAisolation)
TimeInterval.levels<-levels(reads_total$TimeInterval)

#ggplot(reads_total, aes(x = TimeInterval, y = total_reads, group = donorID:RNAisolation)) +
#  geom_line(aes(color=RNAisolation))+geom_point() + facet_grid(. ~ Tube) + labs(title = "Raw Number of Reads")

spikes_sum <- spikes %>% gather(key="UniqueID",value="spikecounts",-"spikeID") %>% 
  group_by(UniqueID) %>% dplyr::summarise(counts=sum(spikecounts, na.rm=T)) %>%
  #spread(key=UniqueID,value=spikesum) %>% 
  mutate(reads="spikes")

# add sum of spikes to the total mapped reads (these were not considered as "mapped" yet)
reads_complete <- reads %>% gather(key="UniqueID",value="counts",-"reads") %>% 
  rbind(., spikes_sum) %>%
  spread(key=reads, value=counts) %>% 
  mutate(all_mapped = mapped + spikes) %>% 
  gather(key="reads",value="counts",-"UniqueID")

DT::datatable(sample_annotation %>% dplyr::select(c("RNAID","RNAisolation","PlasmaInputV","donorID","Tube","TimeInterval","Sequinconc","ERCCconc",Abbrevation="GraphKit")), rownames = TRUE, filter="top", options = list(pageLength = 10, scrollX=T), caption = "Sample annotation table")

2 RNA concentration

2.1 Endogenous vs LP spikes: relative small RNA concentration in eluate

calculation: count mapped miRNAS / count mapped LP spikes

gene_level_ratios <- rbind(
  reads %>% 
  filter(reads=="mapped_miR") %>%
  mutate(type="endogenous") %>%
    group_by(type) %>%
    dplyr::summarise_if(is.numeric, sum, na.rm=TRUE),spikes %>%
    mutate(type=gsub(".-..$","",spikeID)) %>%
    group_by(type) %>%
    dplyr::summarise_if(is.numeric, sum, na.rm=TRUE)
  ) %>%
  gather(., key="UniqueID",value="counts",-type) %>%  #long format
  spread(., key = "type", value="counts") %>%
  mutate("LPvsEndo"=LP/endogenous, "RCvsEndo"=RC/endogenous, "EndovsRC"=endogenous/RC, "EndovsLP"= endogenous/LP) %>%
  left_join(., sample_annotation %>%
              dplyr::select(c("UniqueID","RNAID","RNAisolation","Tube","SampleID","GraphTube","EluateV","PlasmaInputV", "BiologicalReplicate", "TimeInterval","Donor", "donorID")), by=c("UniqueID" = "RNAID"))  #add annotation
gene_level_ratios <- gene_level_ratios %>%
  mutate("EndovsLP_InputCorr"= (EndovsLP)*EluateV/PlasmaInputV)

Tube.levels<-levels(gene_level_ratios$Tube)
RNAisolation.levels<-levels(gene_level_ratios$RNAisolation)
TimeInterval.levels<-levels(gene_level_ratios$TimeInterval)

Linear mixed-effects model with crossed fixed effects of tube, kit and TimeInterval.

EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       weights = varIdent(form = ~ 1 | RNAisolation),
       data=gene_level_ratios)

anova(EndovsLP_m1)
EndovsLP_m1<-lme(EndovsLP~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m1)

Only Tube, and the interactions Tube:RNAisolation and Tube:TimeInterval are significant. There are no significant three-way interactions.

Next, we look at second-order interactions:

EndovsLP_m2<-lme(EndovsLP~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m2)
EndovsLP_m3<-lme(EndovsLP~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_m3)

checking normality of residuals

qqnorm(EndovsLP_m3$residuals)
qqline(EndovsLP_m3$residuals)

EndovsLP_m.time<-emmeans(EndovsLP_m3, ~TimeInterval|Tube)
plot(EndovsLP_m.time, comparisons=TRUE)

pairs(EndovsLP_m.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     -6.34 33.1 74  -0.191  0.9800
##  T0 - T16    -11.16 33.1 74  -0.337  0.9395
##  T04 - T16    -4.82 33.1 74  -0.145  0.9884
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     19.18 33.1 74   0.579  0.8318
##  T0 - T16   -113.14 33.1 74  -3.414  0.0030
##  T04 - T16  -132.33 33.1 74  -3.993  0.0004
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     24.20 33.1 74   0.730  0.7464
##  T0 - T16     58.03 33.1 74   1.751  0.1933
##  T04 - T16    33.83 33.1 74   1.021  0.5660
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

Now, compare Kits

EndovsLP_m.method<-emmeans(EndovsLP_m3, ~Tube|RNAisolation)
plot(EndovsLP_m.method, comparisons=TRUE)

pairs(EndovsLP_m.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -85.1 27.1 74  -3.144  0.0067
##  Citrate - serum    -14.2 27.1 74  -0.526  0.8587
##  EDTA - serum        70.8 27.1 74   2.617  0.0286
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -94.0 27.1 74  -3.473  0.0025
##  Citrate - serum   -109.0 27.1 74  -4.027  0.0004
##  EDTA - serum       -15.0 27.1 74  -0.554  0.8447
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

2.2 Endogenous vs RC spikes: relative smallRNA in plasma

calculation: count mapped miRNAS / count mapped RC spikes

gene_level_ratios$GraphTube2<- paste(gene_level_ratios$Tube, gene_level_ratios$TimeInterval, sep = "_") 
test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsRC", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes2 <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) + 
  #geom_bar(position=position_dodge(), stat="identity")+
  geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
  geom_point(aes(colour=Tube), size=1.2) +
  geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
  geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
  facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
  labs(x="", y="relative small RNA conc. (rescaled)", title=NULL, col = NULL, fill = NULL) +
  scale_colour_manual(values=color_panel) +
  scale_y_log10() +
  theme_bar 
ggplotly(spikes2)
EndovsRC_m1<-lme(EndovsRC~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m1)
anova_EndovsRC_m1 <- round(anova(EndovsRC_m1)[c(5, 6, 7), c(4)], digits = 3)
EndovsRC_m2<-lme(EndovsRC~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m2)
EndovsRC_m3<-lme(EndovsRC~Tube+RNAisolation+TimeInterval+Tube:TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsRC_m3)
EndovsRC.time<-emmeans(EndovsRC_m3, ~TimeInterval|Tube)
plot(EndovsRC.time, comparisons=TRUE)

pairs(EndovsRC.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04   -17.647 18.1 76  -0.976  0.5945
##  T0 - T16   -18.139 18.1 76  -1.003  0.5774
##  T04 - T16   -0.493 18.1 76  -0.027  0.9996
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    13.113 18.1 76   0.725  0.7495
##  T0 - T16   -51.912 18.1 76  -2.870  0.0145
##  T04 - T16  -65.025 18.1 76  -3.595  0.0016
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04    16.067 18.1 76   0.888  0.6493
##  T0 - T16    18.463 18.1 76   1.021  0.5662
##  T04 - T16    2.396 18.1 76   0.132  0.9904
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

EDTA has a significantly higher smallRNA concentration in plasma at time T16.

EndovsRC.time<-emmeans(EndovsRC_m3, ~Tube|TimeInterval)
plot(EndovsRC.time, comparisons=TRUE)

pairs(EndovsRC.time)
## TimeInterval = T0:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA    -64.83 18.1 76  -3.584  0.0017
##  Citrate - serum   -29.56 18.1 76  -1.634  0.2377
##  EDTA - serum       35.28 18.1 76   1.950  0.1318
## 
## TimeInterval = T04:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA    -34.07 18.1 76  -1.884  0.1504
##  Citrate - serum     4.16 18.1 76   0.230  0.9713
##  EDTA - serum       38.23 18.1 76   2.114  0.0938
## 
## TimeInterval = T16:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA    -98.61 18.1 76  -5.451  <.0001
##  Citrate - serum     7.04 18.1 76   0.389  0.9199
##  EDTA - serum      105.65 18.1 76   5.841  <.0001
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
gene_pwc <- gene_level_ratios %>% group_by(Tube) %>% pairwise_t_test(EndovsRC ~ TimeInterval) %>% add_xy_position(x = "TimeInterval")

# Show p-values
# ggplot(gene_level_ratios, aes(x = TimeInterval, y = EndovsRC, color = TimeInterval)) +
#   geom_boxplot()+
  # geom_point()+
  # scale_colour_manual(values=color_panel)+
  # facet_wrap(~ Tube) +
  # stat_pvalue_manual(gene_pwc, label = "p.signif") +
  # labs(title = "Time lapse comparison within tubes (T test)", y = "miRNA vs RC concentration")

2.3 RNA Extraction efficiency

Calculation: Endo/LP * EluateV/PlasmaInputV

test <- log_rescaling_CI(gene_level_ratios, measurevar="EndovsLP_InputCorr", groupvar=c("GraphTube2"))
# Plot LP/endo in log10 scale
spikes_conc <- ggplot(test, aes(x=GraphTube2, y=10^measurevar_log_resc)) + 
  #geom_bar(position=position_dodge(), stat="identity")+
  facet_wrap(~RNAisolation, scales = "free_x", ncol = 2)+
  geom_errorbar(aes(ymin=ci_lower_oriscale, ymax=ci_upper_oriscale), colour="grey", width=.1) +
  geom_point(aes(colour=Tube), size=1.2) +
  geom_point(data=test, aes(x=GraphTube2, y=mean_oriscale), shape=4, colour="grey") +
  geom_hline(yintercept = 1, linetype="dashed",colour="grey88") +
  labs(x="", y="relative endogenous small RNA concentration", title=NULL, subtitle="endogenous small RNA vs LP (rescaled)", col = NULL) +
  scale_colour_manual(values=color_panel) +
  scale_y_log10() +
  theme_bar 

ggplotly(spikes_conc)
EndovsLP_InputCorr_m1<-lme(EndovsLP_InputCorr~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m1)
anova_EndovsLP_InputCorr_m1 <- round(anova(EndovsLP_InputCorr_m1)[c(5, 6, 7), c(4)], digits = 3)
EndovsLP_InputCorr_m2<-lme(EndovsLP_InputCorr~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m2)
EndovsLP_InputCorr_m3<-lme(EndovsLP_InputCorr~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random= ~1|donorID, 
       data=gene_level_ratios)

anova(EndovsLP_InputCorr_m3)
qqnorm(EndovsLP_InputCorr_m3$residuals)
qqline(EndovsLP_InputCorr_m3$residuals)

EndovsLP_InputCorr_m.time<-emmeans(EndovsLP_InputCorr_m3, ~TimeInterval|Tube)
plot(EndovsLP_InputCorr_m.time, comparisons=TRUE)

pairs(EndovsLP_InputCorr_m.time)
## Tube = Citrate:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04    -0.656 1.7 74  -0.386  0.9212
##  T0 - T16    -1.040 1.7 74  -0.612  0.8138
##  T04 - T16   -0.384 1.7 74  -0.226  0.9722
## 
## Tube = EDTA:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04     1.602 1.7 74   0.943  0.6149
##  T0 - T16    -6.004 1.7 74  -3.535  0.0020
##  T04 - T16   -7.606 1.7 74  -4.478  0.0001
## 
## Tube = serum:
##  contrast  estimate  SE df t.ratio p.value
##  T0 - T04     1.407 1.7 74   0.828  0.6866
##  T0 - T16     2.703 1.7 74   1.591  0.2559
##  T04 - T16    1.295 1.7 74   0.763  0.7270
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
EndovsLP_InputCorr_m.method<-emmeans(EndovsLP_InputCorr_m3, ~Tube|RNAisolation)
plot(EndovsLP_InputCorr_m.method, comparisons=TRUE)

pairs(EndovsLP_InputCorr_m.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -8.51 1.39 74  -6.133  <.0001
##  Citrate - serum    -1.42 1.39 74  -1.027  0.5624
##  EDTA - serum        7.08 1.39 74   5.106  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -3.13 1.39 74  -2.258  0.0682
##  Citrate - serum    -3.63 1.39 74  -2.619  0.0285
##  EDTA - serum       -0.50 1.39 74  -0.360  0.9310
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

3 Gene Count (Sensitivity)

number of miRNA with counts > 6

cutoff_kit <- data.frame(Tube = sample_annotation$Tube, GraphTube = sample_annotation$GraphTube, median_th = 3)
miRNAs_long <-  miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
  left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate)), by=c("UniqueID" = "RNAID"))
#keep only miRNA coding genes with more than 0 counts

miRNAs_long <-  miRNAs_long %>% filter(est_counts > 0)

number_miR_detected <-  miRNAs_long %>% group_by(SampleID) %>% dplyr::summarize(miR_above0=n()) 

number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_long %>% group_by(SampleID) %>%
                                     dplyr::summarize(total_est_counts_above0=sum(est_counts)), 
                                   by="SampleID")
#cutoff_kit <- single_pos %>% group_by(GraphTube) %>% dplyr::summarise(median_th = median(filter_threshold))
 miRNAs_cutoff <-  miRNAs %>% gather(., key="UniqueID", value="est_counts", -MIMATID) %>% #long format
  left_join(., dplyr::select(sample_annotation,c(UniqueID,RNAID,RNAisolation,GraphTube,GraphKit, SampleID, Tube, BiologicalReplicate, TimeInterval)), by=c("UniqueID" = "RNAID")) 
 
  #left_join(., cutoff_kit, by="GraphTube") #add the median cut-off for each kit
 miRNAs_cutoff <-  miRNAs_cutoff %>% 
  filter(est_counts > 6)
 
number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_cutoff %>% group_by(SampleID) %>%
                                     dplyr::summarize(miR_aboveTh=n()),
                                   by="SampleID")
number_miR_detected <- full_join(number_miR_detected, 
                                    miRNAs_cutoff %>% group_by(SampleID) %>%
                                     dplyr::summarize(total_est_counts_aboveTh=sum(est_counts)), 
                                   by="SampleID")
number_miR_detected <- left_join(number_miR_detected, 
                                   dplyr::select(sample_annotation, c(UniqueID,RNAID, GraphTube, GraphKit, RNAisolation, EluateV,SampleID, Tube, BiologicalReplicate, TimeInterval, donorID)),
                                   by="SampleID")
#convert to the original format
 miRNAs_cutoff <- dplyr::select(miRNAs_cutoff, MIMATID, UniqueID, est_counts, Tube, BiologicalReplicate, TimeInterval) %>% 
  spread(., key=UniqueID, value=est_counts)
#write.csv( miRNAs_cutoff, file="../../../exRNAQC/ miRNAs_cutoff.csv",row.names=FALSE, na="")
count_m1<-lme(miR_aboveTh~Tube*RNAisolation*TimeInterval,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m1)
anova_count_m1 <- round(anova(count_m1)[c(5, 6, 7), c(4)], digits = 3)
count_m2<-lme(miR_aboveTh~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m2)
count_m3<-lme(miR_aboveTh~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|donorID, 
       data=number_miR_detected)

anova(count_m3)
qqnorm(count_m3$residuals)
qqline(count_m3$residuals)

count_m3.time<-emmeans(count_m3, ~TimeInterval|Tube)
plot(count_m3.time, comparisons=TRUE)

pairs(count_m3.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      15.2 5.24 74   2.901  0.0134
##  T0 - T16      26.2 5.24 74   5.000  <.0001
##  T04 - T16     11.0 5.24 74   2.099  0.0969
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04      -0.5 5.24 74  -0.095  0.9950
##  T0 - T16     -12.5 5.24 74  -2.385  0.0508
##  T04 - T16    -12.0 5.24 74  -2.290  0.0635
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04       1.6 5.24 74   0.305  0.9499
##  T0 - T16       5.6 5.24 74   1.069  0.5364
##  T04 - T16      4.0 5.24 74   0.763  0.7265
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
count_m3.time<-emmeans(count_m3, ~Tube|TimeInterval)
plot(count_m3.time, comparisons=TRUE)

pairs(count_m3.time)
## TimeInterval = T0:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA      18.0 5.24 74   3.435  0.0028
##  Citrate - serum    -10.4 5.24 74  -1.985  0.1231
##  EDTA - serum       -28.4 5.24 74  -5.420  <.0001
## 
## TimeInterval = T04:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA       2.3 5.24 74   0.439  0.8994
##  Citrate - serum    -24.0 5.24 74  -4.580  0.0001
##  EDTA - serum       -26.3 5.24 74  -5.019  <.0001
## 
## TimeInterval = T16:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -20.7 5.24 74  -3.950  0.0005
##  Citrate - serum    -31.0 5.24 74  -5.916  <.0001
##  EDTA - serum       -10.3 5.24 74  -1.966  0.1280
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
#T test
# Pairwise comparisons
count_pwc <-  number_miR_detected %>% group_by(TimeInterval) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")


# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point() +
  facet_wrap(~ as.factor(TimeInterval)) +
  stat_pvalue_manual(count_pwc, label = "p.adj") +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
  labs(y = "sensitivity")
count_plot

count_m3.method<-emmeans(count_m3, ~Tube|RNAisolation)
plot(count_m3.method, comparisons=TRUE)

pairs(count_m3.method)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA      24.2 4.28 74   5.656  <.0001
##  Citrate - serum     -3.8 4.28 74  -0.888  0.6495
##  EDTA - serum       -28.0 4.28 74  -6.544  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -24.5 4.28 74  -5.718  <.0001
##  Citrate - serum    -39.8 4.28 74  -9.302  <.0001
##  EDTA - serum       -15.3 4.28 74  -3.584  0.0017
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
count_pwc <-  number_miR_detected %>% group_by(GraphKit) %>% pairwise_t_test(miR_aboveTh ~ Tube) %>% add_xy_position(x = "Tube")


# Show p-values
count_plot<-ggplot(number_miR_detected, aes(x = Tube, y = miR_aboveTh, color=Tube)) +
  geom_boxplot() +
  geom_point() +
  facet_wrap(~ as.factor(GraphKit)) +
  stat_pvalue_manual(count_pwc, label = "p.adj", y.position = c(215, 225, 235)) +
  scale_color_manual(values=color_panel)+
  scale_fill_manual(values=color_panel)+
  theme_point + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), legend.position = 'none')+
  labs(y = "sensitivity")
count_plot

ggsave("./smallSensitivity.pdf", plot = count_plot, dpi = 300)
## Saving 7 x 5 in image

4 Area left of the curve (ALC)

  • The ALC (area-left-of-curve) calculation is done according to see Mestdagh et al., Nature Methods, 2014, and represents a precision metric:
    1. Only genes that reach threshold (95% single positive (SP) elimination based on exRNAQC004) are taken into account.
    2. All zero counts are replaced by NA
    3. The log2 ratios of counts for each gene are determined, each time between 2 timepoints
    4. Then, the absolute value of these log2 ratios are taken and ranked. This is plotted for every condition
  • The faster the curve reaches 100% -> the smaller the ALC -> the better the precision (indicates that the replicates give similar counts for each gene)

The ALC values are summarized based on the average of the ALC values (after removing NAs). The dot plot at the end gives an overview of all these values.

Score: lower ALC = better

ALC_m1<-lme(ALC_calc~Tube*RNAisolation*TimePoint,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m1)
anova_ALC_m1 <- round(anova(ALC_m1)[c(5, 6, 7), c(4)], digits = 3)

There is a significant interaction between Tube:RNAIsolatiom

Now, second-order interactions

ALC_m2<-lme(ALC_calc~(Tube+RNAisolation+TimePoint)^2,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m2)

There interaction between Tube:RNAIsolatiom remains significant.

Now, the model keeping only the significant interaction

ALC_m3<-lme(ALC_calc~Tube+RNAisolation+TimePoint+Tube:RNAisolation,
       random=~1|BiologicalReplicate, 
       data=ALC)

anova(ALC_m3)
qqnorm(ALC_m3$residuals)
qqline(ALC_m3$residuals)

ALC_m3.methods<-emmeans(ALC_m3, ~Tube|RNAisolation)
plot(ALC_m3.methods, comparisons=TRUE)

pairs(ALC_m3.methods)
## RNAisolation = Maxwell:
##  contrast        estimate     SE df t.ratio p.value
##  Citrate - EDTA   0.00600 0.0109 49   0.548  0.8480
##  Citrate - serum  0.02292 0.0109 49   2.093  0.1017
##  EDTA - serum     0.01692 0.0109 49   1.545  0.2790
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate     SE df t.ratio p.value
##  Citrate - EDTA   0.04688 0.0109 49   4.283  0.0002
##  Citrate - serum  0.04910 0.0109 49   4.485  0.0001
##  EDTA - serum     0.00222 0.0109 49   0.202  0.9777
## 
## Results are averaged over the levels of: TimePoint 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
ALC_pwc <- ALC %>% group_by(RNAisolation) %>% pairwise_t_test(ALC_calc ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
alc_plot<-ggplot(ALC, aes(x = Tube, y = ALC_calc, color = Tube)) +
  geom_boxplot()+
  geom_point()+
  scale_colour_manual(values=color_panel)+
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(ALC_pwc, label = "p.signif") +
  theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
  labs(title = "Tube comparison within kits (T test)", y = "Area left of the curve")
alc_plot

5 Biotype: Percentage of miRNAs counts

We are usually interested in miRNAs, and the other biotypes are often seen as contaminants (see above). The fraction was calculated as the number of miRNA reads / total reads (mapped + spikes)

perc_miRs <- miRNAs %>% gather(key="UniqueID",value="counts", -"MIMATID") %>% mutate(type=gsub("[0-9].*$","",MIMATID)) %>%
  dplyr::group_by(UniqueID,type) %>% 
  dplyr::summarise(sum_counts = sum(counts, na.rm=T)) %>% #calculate sum of MIMAT per sample
  full_join(reads_complete %>% filter(reads=="all_mapped")) %>% 
  mutate(perc=sum_counts/counts*100) %>% #calculate perc of miRs and spikes of total mapped reads
  left_join(sample_annotation, by=c("UniqueID" = "RNAID"))
Biotype_m1<-lme(perc~Tube*RNAisolation*TimeInterval,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)

anova(Biotype_m1)
anova_Biotype_m1 <- round(anova(Biotype_m1)[c(5, 6, 7), c(4)], digits = 3)

Second order:

Biotype_m2<-lme(perc~(Tube+RNAisolation+TimeInterval)^2,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)

anova(Biotype_m2)

Keeping only the significant interactions:

Biotype_m3<-lme(perc~Tube+RNAisolation+TimeInterval+Tube:RNAisolation+Tube:TimeInterval,
       random=~1|BiologicalReplicate, 
       data=perc_miRs)
anova(Biotype_m3)
Biotype_m3.methods<-emmeans(Biotype_m3, ~Tube|RNAisolation)
plot(Biotype_m3.methods, comparisons=TRUE)

pairs(Biotype_m3.methods)
## RNAisolation = Maxwell:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA     -6.09 2.46 78  -2.474  0.0407
##  Citrate - serum    14.81 2.46 78   6.021  <.0001
##  EDTA - serum       20.90 2.46 78   8.495  <.0001
## 
## RNAisolation = miRNeasySPAkit:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA    -38.44 2.46 78 -15.628  <.0001
##  Citrate - serum   -15.22 2.46 78  -6.189  <.0001
##  EDTA - serum       23.22 2.46 78   9.439  <.0001
## 
## Results are averaged over the levels of: TimeInterval 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
# Pairwise comparisons
Biotype_pwc <- perc_miRs %>% group_by(RNAisolation) %>% pairwise_t_test(perc ~ Tube) %>% add_xy_position(x = "Tube")

# Show p-values
Biotype_plot<-ggplot(perc_miRs, aes(x = Tube, y = perc, color = Tube)) +
  geom_boxplot()+
  geom_point()+
  scale_colour_manual(values=color_panel)+
  facet_wrap(~ RNAisolation) +
  stat_pvalue_manual(Biotype_pwc, label = "p.signif") +
  theme(panel.grid.major.x=element_line(linetype = "dashed",color="gray88"), axis.text.x=element_text(size=8))+
  labs(title = "Tube comparison within kits (T test)", y = "miRNA counts (%)")
Biotype_plot

Biotype_m3.time<-emmeans(Biotype_m3, ~TimeInterval|Tube)
plot(Biotype_m3.time, comparisons=TRUE)

pairs(Biotype_m3.time)
## Tube = Citrate:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     0.590 3.01 78   0.196  0.9791
##  T0 - T16    13.105 3.01 78   4.350  0.0001
##  T04 - T16   12.515 3.01 78   4.154  0.0002
## 
## Tube = EDTA:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     1.672 3.01 78   0.555  0.8442
##  T0 - T16     2.018 3.01 78   0.670  0.7816
##  T04 - T16    0.346 3.01 78   0.115  0.9928
## 
## Tube = serum:
##  contrast  estimate   SE df t.ratio p.value
##  T0 - T04     4.579 3.01 78   1.520  0.2873
##  T0 - T16     7.874 3.01 78   2.614  0.0286
##  T04 - T16    3.295 3.01 78   1.094  0.5208
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates
Biotype_m3.time<-emmeans(Biotype_m3, ~Tube|TimeInterval)
plot(Biotype_m3.time, comparisons=TRUE)

pairs(Biotype_m3.time)
## TimeInterval = T0:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA   -18.929 3.01 78  -6.283  <.0001
##  Citrate - serum    0.208 3.01 78   0.069  0.9974
##  EDTA - serum      19.136 3.01 78   6.352  <.0001
## 
## TimeInterval = T04:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA   -17.846 3.01 78  -5.924  <.0001
##  Citrate - serum    4.197 3.01 78   1.393  0.3495
##  EDTA - serum      22.043 3.01 78   7.317  <.0001
## 
## TimeInterval = T16:
##  contrast        estimate   SE df t.ratio p.value
##  Citrate - EDTA   -30.016 3.01 78  -9.963  <.0001
##  Citrate - serum   -5.023 3.01 78  -1.667  0.2242
##  EDTA - serum      24.992 3.01 78   8.296  <.0001
## 
## Results are averaged over the levels of: RNAisolation 
## Degrees-of-freedom method: containment 
## P value adjustment: tukey method for comparing a family of 3 estimates

6 Conclusion

overview <- data.frame(row.names = c("Tube:RNAisolation","Tube:TimeInterval","RNAisolation:TimeInterval"), "RNA concentration" = anova_EndovsRC_m1, "extraction efficiency" = anova_EndovsLP_InputCorr_m1, "sensitivity" = anova_count_m1, "reproducibility" = anova_ALC_m1, "biotype" = anova_Biotype_m1, check.names = FALSE)
overview_t <- t(overview)

#breaks for heatmap
bk1 <- c(seq(0,0.051,by=0.005))

#make color palette
custom_palette = c(colorRampPalette(brewer.pal(n = 7, name =
  "RdBu"))(20))
custom_palette2 = c(custom_palette[1:10], 'white')

#make matrix with labels for heatmap
labels_tab <- overview_t
labels_tab[labels_tab<0.001]<-"<0.001"

#make heatmap
phm<-pheatmap::pheatmap(overview_t,
                   breaks = bk1,
                   display_numbers = labels_tab,fontsize_number=11, number_color="black",
                   color = custom_palette2,
                   angle_col = 45,
                   cluster_rows=FALSE, cluster_cols=FALSE)

ggsave("./smallInteractions2.pdf", plot = phm, dpi = 300)
## Saving 7 x 5 in image